/*****************************************************************************************
  Project   : The Impact of Unions on Non-union Wage Setting: Threats and Bargaining
  Authors   : David A. Green, Ben M. Sand, Iain G. Snoddy, Jeanne Tschopp
  Date      : August 2025
  Output    : Data creating city x industry union election wins per establishment
 *****************************************************************************************/

clear
set more off


// Merging CBP with NLRB

// 1 - Deal with NLRB Data - city
// Cleaned data
	use  ${wd}/nlrb/nlrb7710_final_original_farber,clear
	
// Convert Naics to SIC
qui	do "${nlrb}/naics_to_sic_detind.do"

// Can use other elections to get proportion of decertifications
	drop if casetype=="RM"
	keep if closing_method==4 | closing_method==5

// Get success rate of elections 
	g winner=1 if closing_method==4

// Get success rate in terms of employees
	g winning_emp = num_elig_employees if winner == 1

// Number of cases
	gen n = 1
	
// Sum case level to type, ind, year, area level
collapse (sum)  winner  numcase = n numwork = num_elig_employees winning_emp ///
		 , by(detind casetype year $area )

fillin year detind $area casetype

foreach var in winner numwork numcase winning_emp  { 
	replace `var' = 0 if _fillin == 1
	egen `var'_c = sum(`var'), by(year $area casetype)
	}

gen cert1_c  = winner_c / numcase_c
gen cert2_c  = winning_emp_c / numwork_c

foreach var of varlist cert* {
	replace `var' = 0 if missing(`var')
}

drop _fillin 

// shape id
	egen id = group(year $area $ind)
	
drop winner_* winning_emp_* numwork_* numcase_*	
	
// reshape
	reshape wide winner numwork numcase winning_emp cert1_c cert2_c, i(id) j(casetype) string

// clean up
renvars *RD, pref(de_) 	
renvars *RD, postdrop(2)
renvars *RC, postdrop(2)

// 2 - merging with CBP data

if "$area"=="city_plus2" {
	merge 1:1 year city_plus2 detind using ${wd}/cbp/cbp_city_detind.dta,
}
if "$area"=="state" {
	merge 1:1 year state detind using ${wd}/cbp/cbp_state_detind.dta,
}
tab _merge 
drop _merge

// 3 - partisanship data
egen ic = group($area $ind) 
tsset ic (year), yearly 

// Merge partisan ship data
merge m:1 year $area using ${wd}/partisanship/city_partisanship_yearly.dta
tab _merge
drop _merge

//Klarner, Carl, 2013, "State Partisan Balance Data, 1937 - 2011",
merge m:1 year $area using ${wd}/partisanship/city_partisanship2_yearly.dta
tab _merge
drop _merge


// there are no establishments for public sector
keep if detind < 47

// Drivers 1 - measured at national industry level, leave-i-out

// Industry level sums, leave-city-out
foreach var in testab winner numwork numcase   { 
	
	// leave-i-out
	egen lo_`var'_i = sum(`var'), by(year $ind )
	replace lo_`var'_i = lo_`var'_i - `var' 
	
	// i-sums
	egen `var'_i = sum(`var'), by(year $ind )
}

// Proxies for Driver 1 -  national, industry level 

	// Elections per 100 establishments
	gen lo_Elect_Estab_i = lo_numcase_i / ( lo_testab_i  )
	
	gen lo_Wins_Estab_i = lo_winner_i / ( lo_testab_i  )
	
// Proxies for Driver 2 - city level - NOTE: constructed as leave-i-out


// city level sums, leave-ic-out
	foreach var in testab winner  numcase    {   
	
	// leave-ic-out
	egen lo_`var'_c = sum(`var'), by(year $area )
	replace lo_`var'_c = lo_`var'_c - `var' 
	
	// c-sums
	egen `var'_c = sum(`var'), by(year $area )
}

gen lo_Elect_Estab_c = lo_numcase_c / lo_testab_c
gen lo_Wins_Estab_c  = lo_winner_c / lo_testab_c

// Actual threat measure
gen win_frac = winner 

// city and industry total
foreach var in  win_frac testab  { 
	gegen t`var'_c = sum(`var'), by(year $area )
	gegen t`var'_i = sum(`var'), by(year $ind  )
}

foreach var in   win_frac   { 
	gen `var'_ic = `var'  / testab
	gen `var'_c = t`var'_c / ttestab_c
	gen `var'_i = t`var'_i / ttestab_i
	
 }

drop win_frac  t*_c t*_i 
 
tsset
foreach var of varlist win_frac_ic lo_Elect_Estab_i lo_Elect_Estab_c lo_Wins_Estab_i  lo_Wins_Estab_c {  
	gen `var'_5 = 1 - (1-`var') * (1-L.`var') * (1-L2.`var') * (1 - L3.`var') 
	}

foreach var of varlist  govparty_c2  *_rep_frac {
	gen t0 = (`var' + L.`var' + L2.`var'  )/3
	gen t1 = (`var' + L.`var' + L2.`var' + L3.`var' )/4
	gen t2 = (`var' + L.`var' + L2.`var' + L3.`var' + L4.`var')/5
	
	gen m_`var' = t2 if !missing(t2)
	replace m_`var' = t1 if missing(t2) & !missing(t1)
	replace m_`var' = t0 if missing(t2) & missing(t1) &!missing(t0)
	drop t0 t1 t2
}

// Keep relevant years
g year_obs=1 if inlist(year, 1978,1979,1980)
replace year_obs=2 if inlist(year, 1988,1989,1990)
replace year_obs=3 if inlist(year, 1998,1999,2000)
replace year_obs=4 if inlist(year, 2008,2009,2010)

foreach var of varlist *_5 m_* { 
	replace `var' = . if !inlist(year,1980,1990,2000,2010 )
}

drop if year_obs==.
rename year_obs year2

// aggregate
collapse (mean) cert* win_frac_ic_5 lo_Elect_Estab_i_5 lo_Elect_Estab_c_5 lo_Wins_Estab_i_5  lo_Wins_Estab_c_5 ///
				m_senate_rep_frac m_house_rep_frac m_govparty_c2, ///
				by($ind year2 $area )

// Save output
save ${wd}/keyvars/union_elections_${area}_${ind}.dta	, replace

